Preprocessing QC statistics ¶

June 2025¶

In [1]:
import os
NOVA_HOME = '/home/projects/hornsteinlab/Collaboration/NOVA'
NOVA_DATA_HOME = '/home/projects/hornsteinlab/Collaboration/MOmaps'
LOGS_PATH = os.path.join(NOVA_HOME, "outputs", "preprocessing", "ManuscriptFinalData_80pct", "neuronsDay8", "logs")
PLOT_PATH = os.path.join(NOVA_HOME, 'outputs', 'preprocessing', 'ManuscriptFinalData_80pct', "neuronsDay8", 'QC_figures')
os.chdir(NOVA_HOME)
import pandas as pd
import contextlib
import io
from IPython.display import display, Javascript

from tools.preprocessing_tools.qc_reports.qc_utils import log_files_qc, run_validate_folder_structure, display_diff, sample_and_calc_variance, \
                                                show_site_survival_dapi_brenner, show_site_survival_dapi_cellpose, \
                                                show_site_survival_dapi_tiling, show_site_survival_target_brenner, \
                                                calc_total_sums, plot_filtering_heatmap, show_total_sum_tables, \
                                                plot_cell_count, plot_catplot, plot_hm_combine_batches, plot_hm, \
                                                run_calc_hist_new
                                                
from tools.preprocessing_tools.qc_reports.qc_config import panels, markers, marker_info, cell_lines, cell_lines_to_cond,\
                                    cell_lines_for_disp, reps, line_colors, lines_order, custom_palette,\
                                    expected_dapi_raw
%load_ext autoreload
%autoreload 2
In [2]:
# choose batches
batches = [f'batch{i}' for i in range(5,10)]
batches
Out[2]:
['batch5', 'batch6', 'batch7', 'batch8', 'batch9']
In [3]:
df = log_files_qc(LOGS_PATH, batches)

df_dapi = df[df.marker=='DAPI']
df_target = df[df.marker!='DAPI']
reading logs of batch8
reading logs of batch5
reading logs of batch6
reading logs of batch9
reading logs of batch7

Total of 5 files were read.
Before dup handeling  (296016, 21)
After duplication removal #1: (296016, 22)
After duplication removal #2: (296016, 22)

Actual Files Validation¶

Raw Files Validation¶

  1. How many site tiff files do we have in each folder?
  2. Are all existing files valid? (tif, at least 2049kB, not corrupetd)
In [4]:
root_directory_raw = os.path.join(NOVA_DATA_HOME, 'input', 'images', 'raw', 'SpinningDisk')

batches_raw = [batch.replace("_16bit_no_downsample","") for batch in batches]
raws = run_validate_folder_structure(root_directory_raw, False, panels, markers,PLOT_PATH,marker_info,
                                    cell_lines_to_cond, reps, cell_lines_for_disp, expected_dapi_raw,
                                     batches=batches_raw)
batch5
Folder structure is valid.
No bad files are found.
Total Sites:  64800
========
batch6
Folder structure is valid.
No bad files are found.
Total Sites:  64800
========
batch7
Folder structure is valid.
No bad files are found.
Total Sites:  64800
========
batch8
Folder structure is valid.
No bad files are found.
Total Sites:  64800
========
batch9
Folder structure is valid.
No bad files are found.
Total Sites:  64800
========
====================

Processed Files Validation¶

  1. How many site npy files do we have in each folder? -> How many sites survived the pre-processing?
  2. Are all existing files valid? (at least 100kB, npy not corrupted)
In [5]:
root_directory_proc = os.path.join(NOVA_DATA_HOME, 'input', 'images', 'processed', 'ManuscriptFinalData_80pct',
                              'neuronsDay8')
procs = run_validate_folder_structure(root_directory_proc, True, panels, markers,PLOT_PATH,marker_info,
                                    cell_lines_to_cond, reps, cell_lines_for_disp, expected_dapi_raw,
                                     batches=batches)
batch5
Folder structure is invalid. Missing 11 paths:
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch5/FUSHomozygous/Untreated/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch5/TDP43/Untreated/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch5/TBK1/Untreated/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch5/WT/Untreated/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch5/WT/stress/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch5/FUSRevertant/Untreated/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch5/OPTN/Untreated/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch5/FUSHeterozygous/Untreated/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch5/SCNA/Untreated/mitotracker
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch5/SCNA/Untreated/DCP1A
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch5/SCNA/Untreated/TIA1
No bad files are found.
Total Sites:  48460
========
batch6
Folder structure is invalid. Missing 9 paths:
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch6/FUSHomozygous/Untreated/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch6/TDP43/Untreated/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch6/TBK1/Untreated/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch6/WT/Untreated/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch6/WT/stress/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch6/FUSRevertant/Untreated/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch6/OPTN/Untreated/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch6/FUSHeterozygous/Untreated/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch6/SCNA/Untreated/TIA1
No bad files are found.
Total Sites:  60739
========
batch7
Folder structure is invalid. Missing 9 paths:
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch7/FUSHomozygous/Untreated/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch7/TDP43/Untreated/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch7/TBK1/Untreated/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch7/WT/Untreated/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch7/WT/stress/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch7/FUSRevertant/Untreated/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch7/OPTN/Untreated/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch7/FUSHeterozygous/Untreated/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch7/SCNA/Untreated/TIA1
No bad files are found.
Total Sites:  61466
========
batch8
Folder structure is invalid. Missing 9 paths:
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch8/FUSHomozygous/Untreated/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch8/TDP43/Untreated/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch8/TBK1/Untreated/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch8/WT/Untreated/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch8/WT/stress/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch8/FUSRevertant/Untreated/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch8/OPTN/Untreated/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch8/FUSHeterozygous/Untreated/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch8/SCNA/Untreated/TIA1
No bad files are found.
Total Sites:  60957
========
batch9
Folder structure is invalid. Missing 9 paths:
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch9/FUSHomozygous/Untreated/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch9/TDP43/Untreated/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch9/TBK1/Untreated/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch9/WT/Untreated/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch9/WT/stress/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch9/FUSRevertant/Untreated/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch9/OPTN/Untreated/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch9/FUSHeterozygous/Untreated/TIA1
/home/projects/hornsteinlab/Collaboration/MOmaps/input/images/processed/ManuscriptFinalData_80pct/neuronsDay8/batch9/SCNA/Untreated/TIA1
No bad files are found.
Total Sites:  61080
========
====================

Difference between Raw and Processed¶

In [6]:
display_diff(batches, raws, procs, PLOT_PATH)
batch5
========
batch6
========
batch7
========
batch8
========
batch9
========

Variance in each batch (of processed files)¶

In [7]:
for batch in batches:
    with contextlib.redirect_stdout(io.StringIO()):
        var = sample_and_calc_variance(root_directory_proc, batch, 
                                       sample_size_per_markers=200, num_markers=26)
    print(f'{batch} var: ',var)
batch5 var:  0.014656935894448232
batch6 var:  0.016318794177940892
batch7 var:  0.015915956429535656
batch8 var:  0.016245487083204658
batch9 var:  0.01658928809417996

Preprocessing Filtering qc¶

By order of filtering

1. % site survival after Brenner on DAPI channel¶

Percentage out of the total sites

In [8]:
dapi_filter_by_brenner = show_site_survival_dapi_brenner(df_dapi,batches, line_colors, panels, reps)

2. % Site survival after Cellpose¶

Percentage out of the sites that passed the previous filter. In parenthesis are absolute values.

A site will be filtered out if Cellpose found 0 cells in it.

In [9]:
dapi_filter_by_cellpose = show_site_survival_dapi_cellpose(df_dapi, batches, dapi_filter_by_brenner, line_colors, panels, reps)

3. % Site survival by tiling¶

Percentage out of the sites that passed the previous filter. In parenthesis are absolute values.

A site will be filtered out if after tiling, no tile is containing at least one whole cell that Cellpose detected.

In [10]:
dapi_filter_by_tiling=show_site_survival_dapi_tiling(df_dapi, batches, dapi_filter_by_cellpose, line_colors, panels, reps)

4. % Site survival after Brenner on target channel¶

Percentage out of the sites that passed the previous filter. In parenthesis are absolute values (if different than the percentages).

In [11]:
show_site_survival_target_brenner(df_dapi, df_target, dapi_filter_by_tiling, markers)

Statistics About the Processed Files¶

In [12]:
stats = ['n_valid_tiles','site_whole_cells_counts_sum','site_cell_count','site_cell_count_sum']
total_sum = calc_total_sums(df_target, df_dapi, stats, markers)

Total tiles¶

In [13]:
total_sum[~total_sum.marker.str.contains('TIA', regex=True)].n_valid_tiles.sum()
Out[13]:
3213360
In [14]:
## Total tiles in wt lines
total_sum[(~total_sum.marker.str.contains('TIA', regex=True))&
         (total_sum.cell_line_cond.isin(['WT stress', 'WT Untreated']))].n_valid_tiles.sum()
Out[14]:
822738
In [15]:
## Total tiles in untreated lines
total_sum[(~total_sum.marker.str.contains('TIA', regex=True))&
         ((~total_sum.cell_line_cond.str.contains('WT')) | (total_sum.cell_line_cond=='WT Untreated'))].n_valid_tiles.sum()
Out[15]:
2816265

Total whole nuclei in tiles¶

In [16]:
total_sum[total_sum.marker =='DAPI'].site_whole_cells_counts_sum.sum()
Out[16]:
689149.0

Total nuclei in sites¶

In [17]:
total_sum[total_sum.marker =='DAPI'].site_cell_count.sum()
Out[17]:
1721557.0

Total tiles in training batches¶

In [18]:
total_sum[(total_sum.batch.str.contains('7|8', regex=True))
          & (~total_sum.marker.str.contains('TIA1', regex=True))].n_valid_tiles.sum()
Out[18]:
1296188

Training: Total whole nuclei in tiles¶

In [19]:
total_sum[(total_sum.batch.str.contains('7|8', regex=True)) 
          & (total_sum.marker =='DAPI')].site_whole_cells_counts_sum.sum()
Out[19]:
274468.0
In [20]:
show_total_sum_tables(total_sum)
n_valid_tiles % valid tiles site_whole_cells_counts_sum site_cell_count
batch5
count 620.000000 620.000000 620.000000 620.000000
mean 797.748387 7.977484 577.695161 1345.653226
std 532.103687 5.321037 390.174513 924.290458
min 1.000000 0.010000 1.000000 1.000000
25% 284.750000 2.847500 205.750000 451.000000
50% 852.500000 8.525000 598.000000 1434.000000
75% 1252.000000 12.520000 902.000000 2108.000000
max 1715.000000 17.150000 1280.000000 2987.000000
sum 494604.000000 NaN 358171.000000 834305.000000
expected_count 450.000000 450.000000 450.000000 450.000000
n_valid_tiles % valid tiles site_whole_cells_counts_sum site_cell_count
batch6
count 627.000000 627.000000 627.000000 6.270000e+02
mean 1134.452951 11.344530 743.421053 1.957804e+03
std 286.694384 2.866944 193.942726 5.264662e+02
min 48.000000 0.480000 36.000000 8.400000e+01
25% 930.500000 9.305000 612.000000 1.578000e+03
50% 1169.000000 11.690000 755.000000 1.987000e+03
75% 1332.000000 13.320000 863.000000 2.332000e+03
max 1669.000000 16.690000 1189.000000 2.993000e+03
sum 711302.000000 NaN 466125.000000 1.227543e+06
expected_count 450.000000 450.000000 450.000000 4.500000e+02
n_valid_tiles % valid tiles site_whole_cells_counts_sum site_cell_count
batch7
count 630.000000 630.000000 630.000000 6.300000e+02
mean 1050.255556 10.502556 715.953968 1.747508e+03
std 285.715774 2.857158 202.380671 5.070049e+02
min 206.000000 2.060000 137.000000 3.220000e+02
25% 839.750000 8.397500 569.000000 1.393250e+03
50% 1057.000000 10.570000 710.000000 1.765000e+03
75% 1265.000000 12.650000 859.500000 2.129750e+03
max 1808.000000 18.080000 1259.000000 3.162000e+03
sum 661661.000000 NaN 451051.000000 1.100930e+06
expected_count 450.000000 450.000000 450.000000 4.500000e+02
n_valid_tiles % valid tiles site_whole_cells_counts_sum site_cell_count
batch8
count 629.000000 629.00000 629.000000 6.290000e+02
mean 1008.786963 10.08787 670.869634 1.691099e+03
std 277.006006 2.77006 187.150172 4.966986e+02
min 59.000000 0.59000 46.000000 9.100000e+01
25% 836.000000 8.36000 546.000000 1.366000e+03
50% 1002.000000 10.02000 663.000000 1.686000e+03
75% 1199.000000 11.99000 801.000000 2.066000e+03
max 1731.000000 17.31000 1190.000000 2.996000e+03
sum 634527.000000 NaN 421977.000000 1.063701e+06
expected_count 450.000000 450.00000 450.000000 4.500000e+02
n_valid_tiles % valid tiles site_whole_cells_counts_sum site_cell_count
batch9
count 627.000000 627.000000 627.000000 6.270000e+02
mean 1134.395534 11.343955 753.824561 1.943244e+03
std 313.470280 3.134703 218.649641 5.661427e+02
min 154.000000 1.540000 97.000000 2.360000e+02
25% 917.000000 9.170000 593.500000 1.519000e+03
50% 1175.000000 11.750000 763.000000 2.038000e+03
75% 1375.000000 13.750000 923.500000 2.368000e+03
max 1813.000000 18.130000 1323.000000 3.215000e+03
sum 711266.000000 NaN 472648.000000 1.218414e+06
expected_count 450.000000 450.000000 450.000000 4.500000e+02
n valid tiles % valid tiles site_whole_cells_counts_sum site_cell_count
All batches
count 3.133000e+03 3133.000000 3.133000e+03 3.133000e+03
mean 1.025650e+03 10.256495 6.926179e+02 1.737917e+03
std 3.728328e+02 3.728328 2.578664e+02 6.621438e+02
min 1.000000e+00 0.010000 1.000000e+00 1.000000e+00
25% 8.140000e+02 8.140000 5.450000e+02 1.349000e+03
50% 1.057000e+03 10.570000 7.070000e+02 1.782000e+03
75% 1.293000e+03 12.930000 8.620000e+02 2.225000e+03
max 1.813000e+03 18.130000 1.323000e+03 3.215000e+03
sum 3.213360e+06 NaN 2.169972e+06 5.444893e+06
expected_count 4.500000e+02 450.000000 4.500000e+02 4.500000e+02

Show Total Tile Counts¶

For each batch, cell line, replicate and markerTotal number of tiles

In [21]:
to_heatmap = total_sum.rename(columns={'n_valid_tiles':'index'})
plot_filtering_heatmap(to_heatmap, extra_index='marker', vmin=None, vmax=None,
                      xlabel = 'Total number of tiles', show_sum=True, figsize=(6,8))
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:381: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:381: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:381: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:381: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:381: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)

Show Total Whole Cell Counts¶

For each batch, cell line, replicate and markerTotal number of tiles

In [22]:
to_heatmap = total_sum.rename(columns={'site_whole_cells_counts_sum':'index'})
plot_filtering_heatmap(to_heatmap, extra_index='marker', vmin=None, vmax=None,
                      xlabel = 'Total number of whole cells', show_sum=True, figsize=(6,8))
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:381: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:381: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:381: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:381: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:381: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)

Show Cell Count Statistics per Batch¶

In [23]:
df_no_empty_sites = df_dapi[df_dapi.n_valid_tiles !=0]
plot_cell_count(df_no_empty_sites, lines_order, custom_palette, y='site_cell_count_sum', 
                title='Cell Count Average per Site (from tiles)')

plot_cell_count(df_no_empty_sites, lines_order, custom_palette, y='site_whole_cells_counts_sum',
                title='Whole Cell Count Average per Site')

plot_cell_count(df_no_empty_sites, lines_order, custom_palette, y='site_cell_count',
               title='Cellpose Cell Count Average per Site')

Show Tiles per Site Statistics¶

In [24]:
df_dapi.groupby(['cell_line_cond']).n_valid_tiles.mean()
Out[24]:
cell_line_cond
FUSHeterozygous     7.278482
FUSHomozygous      12.740322
FUSRevertant       11.098691
OPTN                9.221037
SNCA                6.176806
TBK1               13.147027
TDP43               9.711553
WT Untreated       12.301475
WT stress          11.454986
Name: n_valid_tiles, dtype: float64
In [25]:
plot_catplot(df_dapi, custom_palette,reps, x='n_valid_tiles', x_title='valid tiles count', batch_min=4, batch_max=9)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1017: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[:, 'batch_rep'] = df['batch'] + " " + df['rep']

Show Mean of cell count in valid tiles¶

In [26]:
plot_hm(df_dapi, split_by='rep', rows='cell_line', columns='panel', vmax=3)
In [27]:
df_dapi[['cells_count_in_valid_tiles_mean']].mean()
Out[27]:
cells_count_in_valid_tiles_mean    1.43989
dtype: float64
In [28]:
df_dapi[['site_cell_count']].mean()
Out[28]:
site_cell_count    17.489455
dtype: float64
In [29]:
## Only batches 7&8
plot_hm_combine_batches(df_dapi,  batches=['batch7','batch8'], 
                        reps = ['rep1','rep2'],rows='cell_line', columns='panel', vmax=3)

Assessing Staining Reproducibility and Outliers¶

In [ ]:
for batch in batches:
    print(batch)
    #batch_num = batch.replace('batch',"")
    run_calc_hist_new(batch,cell_lines_for_disp, markers, root_directory_raw, root_directory_proc,
                           hist_sample=10,sample_size_per_markers=200, ncols=7, nrows=4)
    print("="*30)
In [ ]:
# save notebook as HTML ( the HTML will be saved in the same folder the original script is)
display(Javascript('IPython.notebook.save_checkpoint();'))
os.system(f'jupyter nbconvert --to html tools/preprocessing_tools/qc_reports/qc_report_d8.ipynb --output {NOVA_HOME}/manuscript/preprocessing_qc_reports/qc_report_d8.html')
In [ ]: